In [13]:

    
import numpy as np
from sklearn import linear_model
from sklearn import datasets
import matplotlib.pyplot as plt
%matplotlib inline

Generalized Linear Models

1) Ordinary Least Squares



In [40]:

    
dataset = datasets.load_diabetes()
X = dataset.data[:, np.newaxis, 2]
Y = dataset.target.reshape(442, 1)

print(X.shape)
print(Y.shape)









    



(442, 1)
(442, 1)



In [30]:

    
# Implementation
b = np.zeros((X.shape[1], 1))
W = np.zeros((X.shape[1], 1))
learning_rate = 0.001
iterations = 10000
for _ in range(iterations):
    y_hat = b.T + np.dot(X, W.T)
    err = Y - y_hat
    error = np.sum(np.square(err), axis = 1)
    
    db = -2*np.sum(err)
    dW = -2*np.sum(np.dot(X.T, err), axis = 1)
    
    b = b - learning_rate*db
    W = W - learning_rate*dW

print(b, W)









    



[[152.13348416]] [[949.43525847]]



In [17]:

    
# Using sklearn
reg = linear_model.LinearRegression(fit_intercept = True, normalize = False, n_jobs = -1)
reg.fit(X, Y)
print(reg.intercept_, reg.coef_)









    



[152.13348416] [[949.43526038]]



In [29]:

    
plt.plot(X, Y, 'bo')
plt.plot(X, y_hat, color = 'red')
plt.xlabel('X values')
plt.ylabel('Predicted Values')
plt.show()

2) Ridge Regression



In [51]:

    
# Implementation
b = np.zeros((X.shape[1], 1))
W = np.zeros((X.shape[1], 1))
learning_rate = 0.001
iterations = 10000
alpha = 0.8

for _ in range(iterations):
    y_hat = b.T + np.dot(X, W.T)
    err = Y - y_hat
    error = np.sum(np.square(err), axis = 1) + alpha*np.sum(np.square(W))
    
    db = -2*np.sum(err)
    dW = -2*np.sum(np.dot(X.T, err), axis = 1) + 2*alpha*W
    
    b = b - learning_rate*db
    W = W - learning_rate*dW

print(b, W)









    



[[152.13348416]] [[527.46403355]]



In [48]:

    
# Using sklearn
reg = linear_model.Ridge(alpha = 0.8, fit_intercept = True, normalize = False, max_iter = 10000, solver = 'auto')
reg.fit(X, Y)
print(reg.intercept_, reg.coef_)









    



[152.13348416] [[527.46403355]]



In [52]:

    
plt.plot(X, Y, 'bo')
plt.plot(X, y_hat, color = 'red')
plt.xlabel('X values')
plt.ylabel('Predicted Values')
plt.show()



In [57]:

    
# To do cross validation for alpha in Ridge Regression
# Just use a for loop in the implementation to iterarate over all values of the alphas

# In sklearn use RidgeCV
reg = linear_model.RidgeCV(alphas = (0.1, 0.2, 0.3, 0.4), fit_intercept = True, normalize = False, scoring = None, cv = None, gcv_mode = None)
reg.fit(X, Y)
print(reg.intercept_, reg.coef_, reg.alpha_)









    



[152.13348416] [[863.12296399]] 0.1

3) Lasso



In [ ]: